#include "kmeans.h"

//local search
dataset one_swap_local_search(dataset X, dataset C, dataset candi, double m, double eps, int maxT) {
	double cst = rcost(X, C, m, 2);
	for (int t = 1; t <= maxT; t++) {
		int flg = 1;
		for (datapoint x : candi) {
			for (datapoint& c : C) {
				datapoint b = c;
				c = x;
				if (rcost(X, C, m, 2) < (1 - eps) * cst) {
					flg = 0;
					cst = rcost(X, C, m, 2);
					break;
				}
				c = b;
			}
			if (!flg) break;
		}
		if (flg) break;
	}
	return C;
}


//Lloyd for k-Robust Means.
dataset Lloyd(dataset X, dataset C, double m, int maxT) {
	dataset Y = X; double mm = m;
	for (int t = 1; t <= maxT; t++) {
		m = mm;
		X = Y;
		parti _Clusters; _Clusters.resize(C.size());
		vector<int> id;
		vector<double> d;
		for (int i = 0; i < X.size(); i++) {
			id.push_back(i);
			d.push_back(dist(X[i], C));
		}
		sort(id.begin(), id.end(), [d](int x, int y) {return d[x] > d[y]; });
		for (int i = 0; i < X.size(); i++) X[i] = Y[id[i]];
		for (int i = 0; i < C.size(); i++) C[i][0] = i;
		int l = 0;
		for (; l < X.size(); l++) {
			double t = min(X[l][0], m);
			m -= t;
			X[l][0] -= t;
			if (m < 1e-7) break;
		}
		if (X[l][0] < 1e-7) l++;
		for (int i = l; i < X.size(); i++) {
			datapoint x = X[i];
			_Clusters[NN(x, C)[0]].push_back(x);
		}
		dataset curC = C;
		for (int i = 0; i < C.size(); i++) {
			if (_Clusters.empty()) continue;
			datapoint c;
			double sum = 0;
			for (int j = 0; j < C[i].size(); j++) c.push_back(0);
			for (datapoint x : _Clusters[i]) {
				c = c + x * x[0];
				sum += x[0];
			}
			c = c / sum;
			c[0] = i;
			curC[i] = c;
		}
		if (curC == C) break;
		C = curC;
	}
	return C;
}

//k-Robust means algorithm (Lloyd's style)
dataset kmeansm(dataset X, int k, double m, int maxT, double OPT) {
	dataset C, C_cur;
	double LOPT, UOPT, bestCost, cst = -1;
	for (int i = 1; i <= 5; i++) {
		C_cur.clear();
		for (int j = 1; j <= k; j++) C_cur.push_back(X[(int)(randm() * X.size())]);
		bestCost = rcost(X, C_cur, m, 2);
		LOPT = 1; UOPT = bestCost * 5;
		for (double go = LOPT; go <= UOPT; go *= 5) {
			dataset CC = tmeanspp(X, k, go / m, 2);
			if (rcost(X, CC, m, 2) < bestCost) {
				bestCost = rcost(X, CC, m, 2);
				C_cur = CC;
			}
		}
		C_cur = Lloyd(X, C_cur, m, maxT);
		if (cst < 0 || rcost(X, C_cur, m, 2) < cst) {
			cst = rcost(X, C_cur, m, 2);
			C = C_cur;
		}
	}
	return C;
}

//Tmeans ++ from Bhaskara et al. (2019)
dataset tmeanspp(dataset X, const int& k, const double& maxd, int z) {
	dataset C;
	sampler sp;
	vector<double> w;
	w.resize((int)X.size());
	for (int i = 0; i < X.size(); i++) w[i] = X[i][0] * 1e18;
	for (int l = 1; l <= k; l++) {
		sp.init(w);
		datapoint curc = X[sp.sample()];
		C.push_back(curc);
		for (int i = 0; i < X.size(); i++) {
			double wei = Pow(dist(X[i], curc), z);
			if (maxd > 0) wei = min(wei, maxd);
			w[i] = min(w[i], X[i][0] * wei);
		}
	}
	return C;
}


//vanilla k-Means algorithm
dataset Kmeans(dataset X, const int& k) {
	if (debug) {
		cerr << "Init Data:" << endl;
		for (int i = 0; i < X.size(); i++) {
			cerr << "w: " << X[i][0] << "; ";
			for (int j = 1; j < X[i].size(); j++)
				cerr << X[i][j] << ' ';
			cerr << endl;
		}
	}
	dataset C = tmeanspp(X, k, -1, 2); int rd = 0;
	for (int t = 1; t <= 1; t++) {
		dataset curC = tmeanspp(X, k, -1, 2);
		if (cost(X, curC, 2) < cost(X, C, 2)) C = curC;
	}
	while (1) {
		++rd;
		if (rd > 10) break;
		if (debug) {
			cerr << "Round: " << rd << endl;
			cerr << "\t Centers: " << endl;
			for (int i = 0; i < C.size(); i++) {
				cerr << "\t ";
				for (int j = 1; j < C[i].size(); j++)
					cerr << C[i][j] << ' ';
				cerr << endl;
			}
		}
		parti _Clusters; _Clusters.resize(k);
		for (int i = 0; i < C.size(); i++) C[i][0] = i;
		for (datapoint x : X)
			_Clusters[NN(x, C)[0]].push_back(x);
		dataset curC = C;
		for (int i = 0; i < C.size(); i++) {
			if (_Clusters.empty()) continue;
			datapoint c;
			double sum = 0;
			for (int j = 0; j < C[i].size(); j++) c.push_back(0);
			for (datapoint x : _Clusters[i]) {
				c = c + x * x[0];
				sum += x[0];
			}
			c = c / sum;
			c[0] = i;
			curC[i] = c;
		}
		if (curC == C) break;
		C = curC;
	}
	return C;
}

//(alpha,beta,gamma)-approximation 
dataset Approx_KRMeans(dataset X, const int& k, const double& m, int z, double OPT, int T) {
	double beta = 10;
	int xi = 3;
	dataset C;
	double bestCost;
	double LOPT, UOPT, when;
	if (OPT < 0) {
		for (int i = 1; i <= k; i++) C.push_back(X[(int)(randm() * X.size())]);
		bestCost = rcost(X, C, m, z);
		LOPT = bestCost / (double)(X.size() - m); UOPT = bestCost * beta;
	}
	else {
		LOPT = UOPT = OPT;
	}
	for (double go = LOPT; go <= UOPT; go *= beta) { //guess the OPT
		for (int t = 1; t <= T; t++) { //amplify the success prob.
			dataset curC = tmeanspp(X, k * xi, go / m, z);
			double curCost = rcost(X, curC, int((1 + beta) * xi / (double)(xi - 1) * m), z);
			if (curCost < bestCost) {
				bestCost = curCost;
				UOPT = bestCost;
				when = go;
				C = curC;
			}
		}
	}
	return C;
}
